#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>

int main(int argc, char *argv[]) {
    int N, P;
    int rank, size;

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    if (argc != 3) {
        MPI_Finalize();
        return 0;
    }

    N = atoi(argv[1]);
    P = atoi(argv[2]);

    if (P != size) {
        if (rank == 0)
            printf("Number of MPI processes must be equal to P\n");
        MPI_Finalize();
        return 0;
    }

    int *v = NULL;
    int *sorted = NULL;

    if (rank == 0) {
        v = malloc(N * sizeof(int));
        sorted = malloc(N * sizeof(int));

        for (int i = 0; i < N; i++)
            v[i] = N - i + 5;
    }

    /* broadcast vectorul */
    if (rank != 0)
        v = malloc(N * sizeof(int));
    MPI_Bcast(v, N, MPI_INT, 0, MPI_COMM_WORLD);

    /* împărțirea indicilor */
    int base = N / P;
    int rem = N % P;

    int start = rank * base + (rank < rem ? rank : rem);
    int count = base + (rank < rem ? 1 : 0);
    int end = start + count;

    /* fragment local sortat */
    int *local_sorted = malloc(count * sizeof(int));
    int *local_pos = malloc(count * sizeof(int));

    for (int i = start; i < end; i++) {
        int r = 0;
        for (int j = 0; j < N; j++) {
            if (v[j] < v[i] || (v[j] == v[i] && j < i))
                r++;
        }
        local_sorted[i - start] = v[i];
        local_pos[i - start] = r;
    }

    /* trimite fragmentele către procesul 0 */
    if (rank != 0) {
        MPI_Send(local_sorted, count, MPI_INT, 0, 0, MPI_COMM_WORLD);
        MPI_Send(local_pos, count, MPI_INT, 0, 1, MPI_COMM_WORLD);
    }

    /* procesul 0 adună și afișează */
    if (rank == 0) {
        /* propriul fragment */
        for (int i = 0; i < count; i++)
            sorted[local_pos[i]] = local_sorted[i];

        for (int p = 1; p < P; p++) {
            int p_base = N / P;
            int p_rem = N % P;
            int p_count = p_base + (p < p_rem ? 1 : 0);

            int *buf_vals = malloc(p_count * sizeof(int));
            int *buf_pos = malloc(p_count * sizeof(int));

            MPI_Recv(buf_vals, p_count, MPI_INT, p, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
            MPI_Recv(buf_pos, p_count, MPI_INT, p, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);

            for (int i = 0; i < p_count; i++)
                sorted[buf_pos[i]] = buf_vals[i];

            free(buf_vals);
            free(buf_pos);
        }

        /* afișare (doar acest proces) */
        for (int i = 0; i < N; i++)
            printf("%d ", sorted[i]);
        printf("\n");
    }

    free(local_sorted);
    free(local_pos);
    free(v);
    if (rank == 0)
        free(sorted);

    MPI_Finalize();
    return 0;
}
